library(Seurat)
library(data.table)
library(NMF)
library(rsvd)
library(Rtsne)
library(ggplot2)
library(cowplot)
library(sva)
library(igraph)
library(cccd)
library(KernSmooth)
library(beeswarm)
library(stringr)
library(formatR)
source("tools.R")
library(DESeq2)
Condition message Step 1: Ignore control condition,use all sample Step 2:Under the control condition Positive Negative Tube_1 Tube_2
human.only.pro <- Load_data(data_dir = "./human.txt")
# Create Seurat object and not caculate DESeq,but not set min.cells and
# min.genes
human.all.DESeq <- DESeq_SeuratObj(X = human.only.pro, DESq = FALSE)
## [1] "Scaling data matrix"
##
|
| | 0%
|
|=================================================================| 100%
all.sample.group <- as.character(human.all.DESeq@ident)
table(all.sample.group)
## all.sample.group
## hc001 hc006 hc009 hc012 hc017 hc018 hc020 hc021
## 15 92 187 59 188 188 157 158
## shoutiao
## 21
Group_Bar(human.all.DESeq@raw.data, group = all.sample.group)
# We are interested in the gene ITGB4
GenePlot(human.all.DESeq, gene1 = "ITGB4", gene2 = human.all.DESeq@var.genes[1])
VlnPlot(human.all.DESeq, features.plot = "ITGB4", y.lab.rot = 90) # Violinn plot of gene ITGB in all sample
VlnPlot(human.all.DESeq, features.plot = human.all.DESeq@var.genes[1:6], y.lab.rot = 90) # Violinn plot of gene ITGB in all sample
According to the plot of explore data,we can know that the gene ITGB4 is expressed differently across the sample group.It is fit to what we are interested.And,across the sample(Barplot),there are many genes that actually expressed different.Next,based on the explore plot,we will analysis data deeply with other method
Here,do the dimensionality reduction using the PCA, tSNE method
all.pbmc <- PCA.TSNE(object = human.all.DESeq, pcs.compute = FALSE)
FeaturePlot(object = all.pbmc, features.plot = "ITGB4", pt.size = 4, no.legend = FALSE) # ITGB4 gene in part dataset
FeaturePlot(object = all.pbmc, features.plot = all.pbmc@var.genes[1:4], pt.size = 4,
no.legend = FALSE) # ITGB4 gene in part dataset
DimPlot(all.pbmc, reduction.use = "tsne", pt.size = 4) # grour by sample
DimPlot(all.pbmc, reduction.use = "pca", pt.size = 4) # grour by sample
DimHeatmap(all.pbmc, reduction.type = "pca", check.plot = FALSE)
FeatureHeatmap(all.pbmc, features.plot = "ITGB4", pt.size = 3, plot.horiz = TRUE,
cols.use = c("lightgrey", "blue"))
The Faetureplot of ITGB4 shows that,it only has high expression level in few samples,and expresss lowly in most sample.It means that may be ITGB express differently across sample.And the FeatureHeatmap and Heamap also comfirm this phenomeno.We try the other four variable genes,which has the similar result as gene ITGB4 But the tSNE and * PCA * plot show that, the sample can not be split apparently.The result may be is not good based on the PCA and tSNE method.
Next,we will have analysis on gene differential expression.Find maker genes across sample.We use the method: **wilcox test**
# Finds markers (differentially expressed genes) for each of the identity
# classes in a dataset
all_markers <- FindAllMarkers(all.pbmc, test.use = "bimod", print.bar = FALSE)
head(all_markers)
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster
## RIN2 3.256120e-14 1.7883358 1.000 0.477 1.399969e-09 hc001
## ZBED5-AS1 1.794783e-12 1.6436768 0.933 0.190 7.716671e-08 hc001
## CDC42 2.374445e-12 1.1676537 0.933 0.509 1.020892e-07 hc001
## PTRH2 4.717402e-12 -0.3334440 0.667 0.190 2.028247e-07 hc001
## IPO8 8.368004e-12 -0.4923777 0.467 0.103 3.597823e-07 hc001
## MAP1B 8.961534e-12 -0.6665131 0.533 0.220 3.853012e-07 hc001
## gene
## RIN2 RIN2
## ZBED5-AS1 ZBED5-AS1
## CDC42 CDC42
## PTRH2 PTRH2
## IPO8 IPO8
## MAP1B MAP1B
"ITGB4" %in% all_markers$gene
## [1] TRUE
human.heatmap <- Heatmap_fun(genes = c("ITGB4", "ISCU", "STX4", "WDR89", "AVIL",
"TBC1D5", "ANKRD49", "EDRF1", "CFLAR", "SEPT10", "STOML2", "C18orf8", "MIA3"),
tpm.data = all.pbmc@scale.data, condition = unique(as.character(all.pbmc@ident)),
all.condition = as.character(all.pbmc@ident))
## There ara 9 conditions
## Whether creat data accurate 0
NMF::aheatmap(human.heatmap[[2]], Rowv = NA, Colv = NA, annCol = human.heatmap[[1]],
scale = "none")
** Positive,Negative,Tube_1,Tube_2** analysis
# Read condition data
human.condition.pro <- Load_data(data_dir = "./human.txt", condition.sample_dir = "./human.condition.sample.txt")
# Control condition
control.condition <- human.condition.pro[[2]]
control.sample.names <- colnames(human.condition.pro[[1]])
human.part.DESeq <- DESeq_SeuratObj(X = human.condition.pro[[1]], DESq = FALSE,
condition = control.condition, condition.name = control.sample.names)
## [1] "Scaling data matrix"
##
|
| | 0%
|
|=================================================================| 100%
table(human.part.DESeq@ident)
##
## Negative Positive Tube_1 Tube_2
## 13 9 4 4
Group_Bar(human.part.DESeq@raw.data, group = human.part.DESeq@ident)
GenePlot(human.part.DESeq, gene1 = "ITGB4", gene2 = human.part.DESeq@var.genes[1])
# violon plot
VlnPlot(human.part.DESeq, features.plot = "ITGB4", y.lab.rot = 90) # Violinn plot of gene ITGB in all sample
VlnPlot(human.part.DESeq, features.plot = human.part.DESeq@var.genes[1:4], y.lab.rot = 90) # Violinn plot of gene ITGB in all sample
According to the figure explore,we know that gene ITGB4 expresses differently in the four control condition.It has more higher expression in ** Positive and Tube_1** control condition than other condition.And we try violin plot of the other four variable genes and bar plot of all genes ,they both have the similar result.
# PCA TSNE
part.pbmc <- PCA.TSNE(object = human.part.DESeq, pcs.compute = FALSE, num.pcs = 7)
FeaturePlot(object = part.pbmc, features.plot = "ITGB4", pt.size = 4, no.legend = FALSE) # ITGB4 gene in part dataset
FeaturePlot(object = part.pbmc, features.plot = part.pbmc@var.genes[1:4], pt.size = 4,
no.legend = FALSE) # ITGB4 gene in part dataset
DimPlot(part.pbmc, reduction.use = "tsne", pt.size = 4) # grour by sample
DimHeatmap(part.pbmc, reduction.type = "pca", check.plot = FALSE)
FeatureHeatmap(part.pbmc, features.plot = "ITGB4", pt.size = 3, plot.horiz = TRUE,
cols.use = c("lightgrey", "blue"))
The Faetureplot of ITGB4 shows that,it only has high expression level in few samples,and expresss lowly in most sample.It means that may be ITGB express differently across sample.And the FeatureHeatmap and Heamap also comfirm this phenomeno.We try the other four variable genes,which has the similar result as gene ITGB4 But there is a problem that there is a small number of control sample.May be it will cause biases in analysis.
Next,we will have analysis on gene differential expression.Find maker genes across sample
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## ISCU 1.161711e-08 2.3553312 0.154 0.412 0.0002353511 Negative ISCU
## TBC1D5 2.478029e-07 -1.2420303 0.154 0.529 0.0050202396 Negative TBC1D5
## ANKRD49 3.707230e-07 1.1004516 0.154 0.471 0.0075104780 Negative ANKRD49
## EDRF1 3.719802e-07 3.3860471 0.154 0.353 0.0075359476 Negative EDRF1
## CFLAR 3.992065e-07 -0.9794527 0.231 0.706 0.0080875250 Negative CFLAR
## SEPT10 4.265138e-07 0.6939504 0.231 0.412 0.0086407425 Negative SEPT10
## [1] TRUE
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## ITGB4 0.004310014 0.655698 0.444 0.143 1 Positive ITGB4
After the Differential expression analysis,we can get the the marker genes in differential expression across control condition.The result is matrix contained *p_val,avg_logFC,p_val_adj, gene ..*.Of course the **ITGB4** gene is in the result matrix,But it is not significant(**p_val_adj>0.05**)
across sample
## There ara 4 conditions
## Whether creat data accurate 0
We randomly select the genes(must containe gene **ITGB4**) from result of Differential expression.Draw the heatmap,the plot tells us that these genes actually express differently across sample.
When use the DESeq,it must require the gene count matrix satisify that: every gene contains at least one zero, cannot compute log geometric means. So have to take another method to handle data,but I do not know whether it is reasonable.Just try!!!
condition.1 <- unlist(lapply(colnames(human.only.pro), function(x) return(str_split(x,
"_")[[1]][1])))
condition.2 <- unlist(lapply(colnames(human.only.pro), function(x) return(str_split(x,
"_")[[1]][2])))
xdds <- DESeq_CT(count.data = human.only.pro[1:100, ], condition.1 = condition.1)
plotDispEsts(xdds)
res <- results(xdds, contrast = c("condition.1", "hc001", "hc006"))
res[which(res$padj < 0.05), ]
## log2 fold change (MLE): condition.1 hc001 vs hc006
## Wald test p-value: condition.1 hc001 vs hc006
## DataFrame with 15 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## A4GALT 2.532595 -1.535869 0.4683720 -3.279165 1.041148e-03
## AAAS 2.445982 -2.541349 0.4594577 -5.531194 3.180584e-08
## AAED1 23.956319 -3.571972 0.6732471 -5.305589 1.123100e-07
## AAGAB 6.847168 -3.127263 0.6344760 -4.928891 8.269786e-07
## AAK1 8.223328 -1.940431 0.5974624 -3.247788 1.163059e-03
## ... ... ... ... ... ...
## ABAT 3.038498 -2.295596 0.5403253 -4.248545 2.151632e-05
## ABCA1 14.669886 -2.069920 0.6003567 -3.447817 5.651360e-04
## ABCA13 1.589247 -1.647628 0.4809320 -3.425905 6.127537e-04
## ABCA6 2.547768 -1.534404 0.5275814 -2.908373 3.633142e-03
## ABCB7 6.032290 -3.537115 0.6365307 -5.556864 2.746640e-08
## padj
## <numeric>
## A4GALT 5.552787e-03
## AAAS 5.088935e-07
## AAED1 1.437568e-06
## AAGAB 8.821105e-06
## AAK1 5.725828e-03
## ... ...
## ABAT 1.530050e-04
## ABCA1 3.565112e-03
## ABCA13 3.565112e-03
## ABCA6 1.660865e-02
## ABCB7 5.088935e-07
the table are the result which gene’s padj value<0.05.Include the variable:baseMean,log2FoldChange lfcSE,stat,pvalue,padj.
library(VennDiagram)
r <- DESeq_result(xdds, condition = condition.1)
x <- as.vector(r)
grid.draw(venn.diagram(x[1:3], filename = NULL, fill = c("dodgerblue", "goldenrod1",
"darkorange1")))
grid.draw(venn.diagram(x[1:4], filename = NULL, fill = c("dodgerblue", "goldenrod1",
"darkorange1", "darkorchid1")))
Venn diagram show that the common differentially expressing genes across gorups.The number represents the the number of common genes.Here just show the randomly selected groups